load packages

load object

fileNam <- "/Users/immbio/Desktop/HumanHeartCarTrans2/data/Human_heart_ExpDH.rds"
seuratExpDH <- readRDS(fileNam)
seuratExpDH$clusterName <- "clusterName"
seuratExpDH$clusterName[which(seuratExpDH$RNA_snn_res.0.4 %in% "0" )] <- "Fb1"
seuratExpDH$clusterName[which(seuratExpDH$RNA_snn_res.0.4 %in% "1" )] <- "PerivFb1"
seuratExpDH$clusterName[which(seuratExpDH$RNA_snn_res.0.4 %in% "2" )] <- "Mph2"
seuratExpDH$clusterName[which(seuratExpDH$RNA_snn_res.0.4 %in% "3" )] <- "BEC1"
seuratExpDH$clusterName[which(seuratExpDH$RNA_snn_res.0.4 %in% "4" )] <- "Fb2"
seuratExpDH$clusterName[which(seuratExpDH$RNA_snn_res.0.4 %in% "5" )] <- "CM"
seuratExpDH$clusterName[which(seuratExpDH$RNA_snn_res.0.4 %in% "6" )] <- "Tcell1"
seuratExpDH$clusterName[which(seuratExpDH$RNA_snn_res.0.4 %in% "7" )] <- "BEC2"
seuratExpDH$clusterName[which(seuratExpDH$RNA_snn_res.0.4 %in% "8" )] <- "VSMC"
seuratExpDH$clusterName[which(seuratExpDH$RNA_snn_res.0.4 %in% "9" )] <- "Mph1"
seuratExpDH$clusterName[which(seuratExpDH$RNA_snn_res.0.4 %in% "10" )] <- "BEC3"
seuratExpDH$clusterName[which(seuratExpDH$RNA_snn_res.0.4 %in% "11" )] <- "NC"
seuratExpDH$clusterName[which(seuratExpDH$RNA_snn_res.0.4 %in% "12" )] <- "BaroRec"
seuratExpDH$clusterName[which(seuratExpDH$RNA_snn_res.0.4 %in% "13" )] <- "Bcell"
seuratExpDH$clusterName[which(seuratExpDH$RNA_snn_res.0.4 %in% "14" )] <- "Fb3"
seuratExpDH$clusterName[which(seuratExpDH$RNA_snn_res.0.4 %in% "15" )] <- "Tcell2"
seuratExpDH$clusterName[which(seuratExpDH$RNA_snn_res.0.4 %in% "16" )] <- "LEC"
seuratExpDH$clusterName[which(seuratExpDH$RNA_snn_res.0.4 %in% "17" )] <- "PerivFb2"
seuratExpDH$clusterName[which(seuratExpDH$RNA_snn_res.0.4 %in% "18" )] <- "Adipoc"

colclusterName <- c("#67001f", "#f4a582","#D53E4F", "#B45B5C","#003c30","#01665e","#66C2A5", "#BEAEF8","#BEAED4", "#c7eae5", "#B09C85", "#4e5a4c","#393A3F","pink","#4588CA","#3299CA","#FCC80B","#FEE60B","#628395")
names(colclusterName) <- c("CM","Fb1","Fb2","Fb3","PerivFb1","PerivFb2","VSMC","BEC1","BEC2","BEC3","LEC","NC","BaroRec","Adipoc","Mph1","Mph2","Tcell1","Tcell2","Bcell")

coldiseaseCond <- c("#dfc27d","#BE3144","#f4a582","#B45B5C","#8c510a","#202547","#355C7D","#779d8d", "#01665e", "#3288BD", "#BEAED4") 
names(coldiseaseCond) <- c("donorheart", "explant", "visit1", "visit2", "visit3", "visit4", "visit5", "visitX1", "visitX2", "visitX3", "visitX4")

# Combine slots
seuratExpDH$patient_diseaseCond <- paste0(seuratExpDH$patient, '_', seuratExpDH$diseaseCond)
#table(seuratExpDH$patient_diseaseCond)

seuratExpDH$patient_clusterName <- paste0(seuratExpDH$patient, '_', seuratExpDH$clusterName)
#table(seuratExpDH$patient_clusterName)

seuratExpDH$diseaseCond_clusterName <- paste0(seuratExpDH$diseaseCond, '_', seuratExpDH$clusterName)
table(seuratExpDH$diseaseCond_clusterName)
## 
##   donorheart_Adipoc  donorheart_BaroRec    donorheart_Bcell     donorheart_BEC1     donorheart_BEC2 
##                  78                 270                  88                5834                1743 
##     donorheart_BEC3       donorheart_CM      donorheart_Fb1      donorheart_Fb2      donorheart_Fb3 
##                 963                6129                8213                 912                 296 
##      donorheart_LEC     donorheart_Mph1     donorheart_Mph2       donorheart_NC donorheart_PerivFb1 
##                  97                 462                4097                 579                8851 
## donorheart_PerivFb2   donorheart_Tcell1   donorheart_Tcell2     donorheart_VSMC      explant_Adipoc 
##                 273                 550                  70                1118                 665 
##     explant_BaroRec       explant_Bcell        explant_BEC1        explant_BEC2        explant_BEC3 
##                1874                 927               15142                4267                1297 
##          explant_CM         explant_Fb1         explant_Fb2         explant_Fb3         explant_LEC 
##                5984               20784               16498                1515                 899 
##        explant_Mph1        explant_Mph2          explant_NC    explant_PerivFb1    explant_PerivFb2 
##                2323               11455                1432               14914                 629 
##      explant_Tcell1      explant_Tcell2        explant_VSMC 
##                4203                 703                2425

Run marker Genes

# Set factor levels and identities
seuratExpDH$clusterName <- factor(seuratExpDH$clusterName, levels = c("CM","Fb1","Fb2","Fb3","PerivFb1","PerivFb2","VSMC","BEC1","BEC2","BEC3","LEC","NC","BaroRec","Adipoc","Mph1","Mph2","Tcell1","Tcell2","Bcell"))
Idents(seuratExpDH) <- seuratExpDH$clusterName

genes <- data.frame(gene=rownames(seuratExpDH)) %>% 
  mutate(geneID=sub("^.*\\.", "", gene))

# Create selGenes with geneID vector, join, and filter
selGenes <- data.frame(geneID = rev(c("TTN", "MYBPC3", "RYR2", "NEBL", "TNNT2", "CMYA5", "COL6A3", "DCN",  "FBN1", "C7", "PDGFRA", "CDH19", "PDGFRB","ITGA7","RGS5", "NOTCH3", "MYH11", "ACTA2","PECAM1", "VWF", "EGFL7", "POSTN", "ITGA10", "CDH11","CCL21", "PROX1", "FLT4", "NRXN1", "ANK3", "PTPRZ1", "ASIC2", "PIEZO2", "MTHFD1L", "ACACB", "PLIN1", "GPAM", "CD163", "MRC1", "SIGLEC1", "STAB1", "CSF1R", "MERTK", "IL7R", "PTPRC", "CD2", "KIT", "CPA3", "IL18R1", "MS4A1", "PAX5", "FCRL5","IGKC"))) %>%
  left_join(., genes, by = "geneID") %>%
  filter(gene != "ENSG00000232995.RGS5")

# Run dot plot
p <-dittoSeq::dittoDotPlot(seuratExpDH, vars = selGenes$gene, group.by = "clusterName") + RotatedAxis() + coord_flip() + scale_color_viridis(option= "F")

#Differential analysis using muscat https://github.com/HelenaLC/muscat

#Preparation: Set as ScExperiment
sceExpDH <- as.SingleCellExperiment(seuratExpDH)
#Compute sum factors and normalize
sceExpDH <- computeLibraryFactors(sceExpDH)
sceExpDH <- logNormCounts(sceExpDH)

# Step 1: Add metadata to colData
colData(sceExpDH)$sample_id <- sceExpDH$Sample
colData(sceExpDH)$cluster_id <- sceExpDH$clusterName
colData(sceExpDH)$group_id <- sceExpDH$diseaseCond

# Step 2: Standardize structure
sceExpDH <- prepSCE(sceExpDH,
    kid = "cluster_id",
    gid = "group_id",
    sid = "sample_id")

# Step 3: Pseudobulk aggregation
pb <- aggregateData(sceExpDH, 
    assay = "counts", fun = "sum",
    by = c("cluster_id", "sample_id"))


# run pseudobulk (aggregation-based) DS analysis (analyze differential gene expression in specific cell subpopulations or states, comparing conditions)
ds_pb <- pbDS(pb, method = "edgeR")
##   |                                                                                                  |                                                                                          |   0%  |                                                                                                  |=====                                                                                     |   5%  |                                                                                                  |=========                                                                                 |  11%  |                                                                                                  |==============                                                                            |  16%  |                                                                                                  |===================                                                                       |  21%  |                                                                                                  |========================                                                                  |  26%  |                                                                                                  |============================                                                              |  32%  |                                                                                                  |=================================                                                         |  37%  |                                                                                                  |======================================                                                    |  42%  |                                                                                                  |===========================================                                               |  47%  |                                                                                                  |===============================================                                           |  53%  |                                                                                                  |====================================================                                      |  58%  |                                                                                                  |=========================================================                                 |  63%  |                                                                                                  |==============================================================                            |  68%  |                                                                                                  |==================================================================                        |  74%  |                                                                                                  |=======================================================================                   |  79%  |                                                                                                  |============================================================================              |  84%  |                                                                                                  |=================================================================================         |  89%  |                                                                                                  |=====================================================================================     |  95%  |                                                                                                  |==========================================================================================| 100%
#print(ds_pb)

#run pseudobulk multidimensional scaling (explore and visualize global patterns of expression similarity/differences across samples or groups)
pb_mds <- pbMDS(pb)
# use very distinctive shaping of groups & change cluster colors
#pb_mds <- pb_mds + 
#  scale_shape_manual(values = c(17, 4)) +
#  scale_color_manual(values = RColorBrewer::brewer.pal(8, "Set2"))
# change point size & alpha
  #pb_mds$layers[[1]]$aes_params$size <- 5
  #pb_mds$layers[[1]]$aes_params$alpha <- 0.6
print(pb_mds)

#GSEA with GO for Fibroblast Clusters

markerGenesExpDH <- read.delim("~/Desktop/HumanHeartCarTrans2/analysis/ExpDHmarkerGenesRNA_snn_res.0.4", header = TRUE, sep = "\t")

## adjust table
markerG <- markerGenesExpDH %>% 
  filter(avg_log2FC > 0.5) %>%
  filter(cluster %in% c(0, 4, 14)) %>%  # Fb1 = Cluster 0, Fb2 = Cluster 4, Fb 3 = Cluster 14
  mutate(Gene = sub("^.*\\.", "", gene),
         EnsID = sub("\\..*", "", gene))

## GSEA 
ego <- enrichGO(gene = unique(markerG$EnsID),
                          OrgDb = "org.Hs.eg.db",
                          keyType = 'ENSEMBL',
                          ont = "BP",
                          pAdjustMethod = "BH",
                          pvalueCutoff = 0.05,
                          qvalueCutoff = 0.05)
ego <- setReadable(ego, OrgDb = org.Hs.eg.db)
dotplot(ego, showCategory=30)

GSEA for top 500 genes all clusters

# Convert to a numeric column for plotting
convert_gene_ratio <- function(df) {
  df %>% mutate(GeneRatioNum = sapply(GeneRatio, function(x) {
    parts <- strsplit(x, "/")[[1]]
    as.numeric(parts[1]) / as.numeric(parts[2])
  }))
}

split_results <- lapply(split_results, convert_gene_ratio)

dotplot_list <- lapply(names(split_results), function(cluster_name) {
  df <- split_results[[cluster_name]]
  
  # limit to top N GO terms
  df <- df %>% arrange(p.adjust) %>% head(30)

  ggplot(df, aes(x = GeneRatioNum, y = reorder(Description, GeneRatioNum))) +
    geom_point(aes(size = Count, color = p.adjust)) +
    scale_color_continuous(low = "red", high = "blue", name = "Adj. p-value") +
    scale_size_continuous(name = "Gene Count") +
    labs(title = paste("GO Enrichment for Cluster", cluster_name),
         x = "Gene Ratio",
         y = "GO Term") +
    theme_bw(base_size = 10) +
    theme(legend.position = "right")
})

# Name the plots list
names(dotplot_list) <- names(split_results)

#Display plots
for (p in dotplot_list) {
  print(p)
}

# GSEA for top 500 genes divided by condition ## by all clusters

Visualize GSEA per condition as dotplot

# Convert to a numeric column for plotting
convert_gene_ratio_con <- function(df) {
  df %>% mutate(GeneRatioNum = sapply(GeneRatio, function(x) {
    parts <- strsplit(x, "/")[[1]]
    as.numeric(parts[1]) / as.numeric(parts[2])
  }))
}

split_results_con <- lapply(split_results_con, convert_gene_ratio)

dotplot_list_con <- lapply(names(split_results_con), function(cluster_name) {
  df <- split_results_con[[cluster_name]]
  
  # limit to top N GO terms
  df <- df %>% arrange(p.adjust) %>% head(30)

  ggplot(df, aes(x = GeneRatioNum, y = reorder(Description, GeneRatioNum))) +
    geom_point(aes(size = Count, color = p.adjust)) +
    scale_color_continuous(low = "red", high = "blue", name = "Adj. p-value") +
    scale_size_continuous(name = "Gene Count") +
    labs(title = paste("GO Enrichment for Cluster", cluster_name),
         x = "Gene Ratio",
         y = "GO Term") +
    theme_bw(base_size = 10) +
    theme(legend.position = "right")
})

# Name the plots list
names(dotplot_list_con) <- names(split_results_con)

# save dotplots as pdf in a loop
for (name in names(dotplot_list_con)) {
  ggsave(filename = paste0("GO_dotplot_", name, ".pdf"),
         plot = dotplot_list_con[[name]],
         width = 9,
         height = 8)
}


## session info

``` r
date()
## [1] "Thu Oct  9 15:36:08 2025"
sessionInfo()
## R version 4.5.1 (2025-06-13)
## Platform: aarch64-apple-darwin20
## Running under: macOS Sequoia 15.1
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/lib/libRblas.0.dylib 
## LAPACK: /Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/lib/libRlapack.dylib;  LAPACK version 3.12.1
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## time zone: Europe/Zurich
## tzcode source: internal
## 
## attached base packages:
## [1] grid      stats4    stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] NCmisc_1.2.0                VennDiagram_1.7.3           futile.logger_1.4.3        
##  [4] ggupset_0.4.1               gridExtra_2.3               DOSE_4.2.0                 
##  [7] enrichplot_1.28.4           msigdbr_25.1.1              org.Hs.eg.db_3.21.0        
## [10] AnnotationDbi_1.70.0        clusterProfiler_4.16.0      multtest_2.64.0            
## [13] metap_1.12                  scater_1.35.0               scuttle_1.18.0             
## [16] destiny_3.22.0              circlize_0.4.16             edgeR_4.6.3                
## [19] limma_3.64.3                muscat_1.22.0               viridis_0.6.5              
## [22] viridisLite_0.4.2           lubridate_1.9.4             forcats_1.0.1              
## [25] stringr_1.5.2               purrr_1.1.0                 readr_2.1.5                
## [28] tidyr_1.3.1                 tibble_3.3.0                tidyverse_2.0.0            
## [31] dplyr_1.1.4                 SingleCellExperiment_1.30.1 SummarizedExperiment_1.38.1
## [34] Biobase_2.68.0              GenomicRanges_1.60.0        GenomeInfoDb_1.44.3        
## [37] IRanges_2.42.0              S4Vectors_0.46.0            BiocGenerics_0.54.0        
## [40] generics_0.1.4              MatrixGenerics_1.20.0       matrixStats_1.5.0          
## [43] pheatmap_1.0.13             ggpubr_0.6.1                ggplot2_4.0.0              
## [46] Seurat_5.3.0                SeuratObject_5.2.0          sp_2.2-0                   
## 
## loaded via a namespace (and not attached):
##   [1] igraph_2.1.4             ica_1.0-3                plotly_4.11.0           
##   [4] Formula_1.2-5            tidyselect_1.2.1         bit_4.6.0               
##   [7] doParallel_1.0.17        clue_0.3-66              lattice_0.22-7          
##  [10] rjson_0.2.23             blob_1.2.4               S4Arrays_1.8.1          
##  [13] pbkrtest_0.5.5           parallel_4.5.1           png_0.1-8               
##  [16] plotrix_3.8-4            cli_3.6.5                ggplotify_0.1.3         
##  [19] goftest_1.2-3            VIM_6.2.6                variancePartition_1.38.1
##  [22] textshaping_1.0.3        BiocNeighbors_2.2.0      uwot_0.2.3              
##  [25] curl_7.0.0               mime_0.13                evaluate_1.0.5          
##  [28] tidytree_0.4.6           ComplexHeatmap_2.24.1    stringi_1.8.7           
##  [31] backports_1.5.0          lmerTest_3.1-3           qqconf_1.3.2            
##  [34] httpuv_1.6.16            magrittr_2.0.4           rappdirs_0.3.3          
##  [37] splines_4.5.1            sctransform_0.4.2        ggbeeswarm_0.7.2        
##  [40] DBI_1.2.3                jquerylib_0.1.4          smoother_1.3            
##  [43] withr_3.0.2              corpcor_1.6.10           systemfonts_1.2.3       
##  [46] reformulas_0.4.1         class_7.3-23             lmtest_0.9-40           
##  [49] formatR_1.14             htmlwidgets_1.6.4        fs_1.6.6                
##  [52] ggrepel_0.9.6            labeling_0.4.3           fANCOVA_0.6-1           
##  [55] SparseArray_1.8.1        DESeq2_1.48.2            ranger_0.17.0           
##  [58] DEoptimR_1.1-4           reticulate_1.43.0        hexbin_1.28.5           
##  [61] zoo_1.8-14               XVector_0.48.0           knitr_1.50              
##  [64] ggplot.multistats_1.0.1  UCSC.utils_1.4.0         RhpcBLASctl_0.23-42     
##  [67] timechange_0.3.0         foreach_1.5.2            dittoSeq_1.20.0         
##  [70] patchwork_1.3.2          caTools_1.18.3           data.table_1.17.8       
##  [73] ggtree_3.16.3            R.oo_1.27.1              RSpectra_0.16-2         
##  [76] irlba_2.3.5.1            fastDummies_1.7.5        gridGraphics_0.5-1      
##  [79] lazyeval_0.2.2           yaml_2.3.10              survival_3.8-3          
##  [82] scattermore_1.2          crayon_1.5.3             RcppAnnoy_0.0.22        
##  [85] RColorBrewer_1.1-3       progressr_0.16.0         later_1.4.4             
##  [88] ggridges_0.5.7           codetools_0.2-20         GlobalOptions_0.1.2     
##  [91] aod_1.3.3                KEGGREST_1.48.1          Rtsne_0.17              
##  [94] shape_1.4.6.1            pkgconfig_2.0.3          TMB_1.9.17              
##  [97] spatstat.univar_3.1-4    mathjaxr_1.8-0           EnvStats_3.1.0          
## [100] aplot_0.2.9              scatterplot3d_0.3-44     spatstat.sparse_3.1-0   
## [103] ape_5.8-1                xtable_1.8-4             car_3.1-3               
## [106] plyr_1.8.9               httr_1.4.7               rbibutils_2.3           
## [109] tools_4.5.1              globals_0.18.0           beeswarm_0.4.0          
## [112] broom_1.0.10             nlme_3.1-168             lambda.r_1.2.4          
## [115] assertthat_0.2.1         lme4_1.1-37              digest_0.6.37           
## [118] numDeriv_2016.8-1.1      Matrix_1.7-4             farver_2.1.2            
## [121] tzdb_0.5.0               remaCor_0.0.20           reshape2_1.4.4          
## [124] yulab.utils_0.2.1        glue_1.8.0               cachem_1.1.0            
## [127] polyclip_1.10-7          Biostrings_2.76.0        mvtnorm_1.3-3           
## [130] parallelly_1.45.1        mnormt_2.1.1             statmod_1.5.0           
## [133] ragg_1.5.0               RcppHNSW_0.6.0           ScaledMatrix_1.16.0     
## [136] carData_3.0-5            minqa_1.2.8              pbapply_1.7-4           
## [139] vroom_1.6.6              spam_2.11-1              gson_0.1.0              
## [142] gtools_3.9.5             ggsignif_0.6.4           RcppEigen_0.3.4.0.2     
## [145] shiny_1.11.1             GenomeInfoDbData_1.2.14  glmmTMB_1.1.12          
## [148] R.utils_2.13.0           memoise_2.0.1            rmarkdown_2.30          
## [151] scales_1.4.0             R.methodsS3_1.8.2        future_1.67.0           
## [154] RANN_2.6.2               spatstat.data_3.1-8      rstudioapi_0.17.1       
## [157] cluster_2.1.8.1          mutoss_0.1-13            spatstat.utils_3.2-0    
## [160] hms_1.1.3                fitdistrplus_1.2-4       cowplot_1.2.0           
## [163] colorspace_2.1-2         rlang_1.1.6              xts_0.14.1              
## [166] dotCall64_1.2            ggtangle_0.0.7           laeken_0.5.3            
## [169] mgcv_1.9-3               xfun_0.53                e1071_1.7-16            
## [172] TH.data_1.1-4            iterators_1.0.14         abind_1.4-8             
## [175] GOSemSim_2.34.0          treeio_1.32.0            futile.options_1.0.1    
## [178] bitops_1.0-9             Rdpack_2.6.4             promises_1.3.3          
## [181] RSQLite_2.4.3            qvalue_2.40.0            sandwich_3.1-1          
## [184] fgsea_1.34.2             DelayedArray_0.34.1      proxy_0.4-27            
## [187] GO.db_3.21.0             compiler_4.5.1           prettyunits_1.2.0       
## [190] boot_1.3-32              beachmat_2.24.0          listenv_0.9.1           
## [193] Rcpp_1.1.0               BiocSingular_1.24.0      tensor_1.5.1            
## [196] MASS_7.3-65              progress_1.2.3           BiocParallel_1.42.2     
## [199] babelgene_22.9           spatstat.random_3.4-2    R6_2.6.1                
## [202] fastmap_1.2.0            multcomp_1.4-28          fastmatch_1.1-6         
## [205] rstatix_0.7.2            vipor_0.4.7              TTR_0.24.4              
## [208] ROCR_1.0-11              TFisher_0.2.0            rsvd_1.0.5              
## [211] vcd_1.4-13               nnet_7.3-20              gtable_0.3.6            
## [214] KernSmooth_2.23-26       miniUI_0.1.2             deldir_2.0-4            
## [217] htmltools_0.5.8.1        ggthemes_5.1.0           bit64_4.6.0-1           
## [220] spatstat.explore_3.5-3   lifecycle_1.0.4          blme_1.0-6              
## [223] S7_0.2.0                 nloptr_2.2.1             sass_0.4.10             
## [226] vctrs_0.6.5              robustbase_0.99-6        spatstat.geom_3.6-0     
## [229] sn_2.1.1                 ggfun_0.2.0              future.apply_1.20.0     
## [232] bslib_0.9.0              pillar_1.11.1            gplots_3.2.0            
## [235] pcaMethods_2.0.0         locfit_1.5-9.12          jsonlite_2.0.0          
## [238] GetoptLong_1.0.5